rm(list = ls())

library(DescTools)
library(tidyverse)
library(fastDummies)
library(pbapply)

## Data source: https://search.gesis.org/research_data/ZA7753
## Note that this figure requires data that we cannot make available publicly

dat <- haven::read_dta('data_private/ZA7753_v1-0-0.dta')

## Number of unique responses

length(unique(dat$V9))

## Select MIS and Sonntagsfrage variable

dat <- dat %>%
  mutate(vote_green_bin = ifelse(V12 == 6, 1, 0)) %>% 
  dplyr::select(one_of('V9', 'vote_green_bin', 'V6')) 

## Dummy columns for most important issue variable 

dat <- fastDummies::dummy_cols(dat, 'V9',
                               remove_first_dummy = F, 
                               remove_selected_columns = T)

## Exclude NA category 

dat$V9_47 <- NULL 
dat$V9_99 <- NULL 

## 

issue_vars <- names(dat)[str_detect(names(dat), 'V9')]

## Get R2 value for the "full" model including all issues 
## This serves as the baseline comparison 

m_formula_full <- as.formula(paste0('vote_green_bin ~ ', paste(issue_vars, collapse = '+')))

m_full <- glm(m_formula_full, 
              family = binomial(link = 'logit'),
              data = dat)

pseudo_r2_full <- as.vector(PseudoR2(m_full, which = 'McFadden'))

## Share R2 compared to full model for each variable

results <- pblapply(issue_vars, function(issue_covar){
  
  ## Define model spec
  
  m_formula <- as.formula(paste0('vote_green_bin ~ ', issue_covar))
  
  ## Estimate logistic model 
  
  res <- glm(m_formula, 
             family = binomial(link = 'logit'),
             data = dat)
  
  ## Get McFadden’s R2
  
  pseudo_r2 <- as.vector(PseudoR2(res, which = 'McFadden'))
  
  return(data.frame(var_name = issue_covar,
                    pseudo_r2 = pseudo_r2))
  
}) %>%
  reduce(bind_rows) %>%
  mutate(pseudo_r2_share = pseudo_r2 / pseudo_r2_full,
         var_name = fct_reorder(var_name, pseudo_r2_share))

## Retain only top issues 

n_retain <- 5

results <- results %>%
  filter(var_name != 'V9_46') %>% ## exclude "other" category
  top_n(n_retain, pseudo_r2_share)

## Recode issues 

results <- results %>%
  mutate(var_label = recode(var_name,
                            'V9_13' = 'Climate change / environment',
                            'V9_2' = 'Immigration',
                            'V9_31' = 'Crime, Law and Order',
                            'V9_5' = 'Right-wing extremism',
                            'V9_14' = 'Energy',
                            'V9_28' = 'Dissatisfaction\nwith democracy',
                            'V9_25' = 'Economics',
                            'V9_7' = 'Pensions',
                            'V9_6' = 'Strength of the AfD',
                            'V9_1' = 'Unemployment'))


## Plot This

p1 <- ggplot(results, aes(x = var_label, y = pseudo_r2_share)) + 
  geom_bar(stat = 'identity',
           width = 0.8) + 
  theme_bw() + 
  coord_flip() + 
  labs(y = 'Share of variance',
       x = '') + 
  scale_y_continuous(breaks = seq(0, 1, .1),
                     limits = c(0, 1),
                     labels = scales::percent_format(accuracy = 1))  

p1 

